Kei Hayakawa created this text partially with GPT-4o and GPT-3.5, the large-scale language-generation model of OpenAI. The author reviewed and modified the language, takes responsibility for this text.
# clear objects
rm(list = ls())
# install BiocManager
install.packages("BiocManager")
# install Tidyverse package
install.packages("tidyverse")
# libraries
library(dplyr)
library(ggplot2)
# clear objects
rm(list = ls())
library(dplyr)
# describe data(CO2)
help("CO2")
# check the contents
CO2
Grouped Data: uptake ~ conc | Plant
Plant Type Treatment conc uptake
1 Qn1 Quebec nonchilled 95 16.0
2 Qn1 Quebec nonchilled 175 30.4
3 Qn1 Quebec nonchilled 250 34.8
4 Qn1 Quebec nonchilled 350 37.2
5 Qn1 Quebec nonchilled 500 35.3
6 Qn1 Quebec nonchilled 675 39.2
7 Qn1 Quebec nonchilled 1000 39.7
8 Qn2 Quebec nonchilled 95 13.6
9 Qn2 Quebec nonchilled 175 27.3
10 Qn2 Quebec nonchilled 250 37.1
11 Qn2 Quebec nonchilled 350 41.8
12 Qn2 Quebec nonchilled 500 40.6
13 Qn2 Quebec nonchilled 675 41.4
14 Qn2 Quebec nonchilled 1000 44.3
15 Qn3 Quebec nonchilled 95 16.2
16 Qn3 Quebec nonchilled 175 32.4
17 Qn3 Quebec nonchilled 250 40.3
18 Qn3 Quebec nonchilled 350 42.1
19 Qn3 Quebec nonchilled 500 42.9
20 Qn3 Quebec nonchilled 675 43.9
21 Qn3 Quebec nonchilled 1000 45.5
22 Qc1 Quebec chilled 95 14.2
23 Qc1 Quebec chilled 175 24.1
24 Qc1 Quebec chilled 250 30.3
25 Qc1 Quebec chilled 350 34.6
26 Qc1 Quebec chilled 500 32.5
27 Qc1 Quebec chilled 675 35.4
28 Qc1 Quebec chilled 1000 38.7
29 Qc2 Quebec chilled 95 9.3
30 Qc2 Quebec chilled 175 27.3
31 Qc2 Quebec chilled 250 35.0
32 Qc2 Quebec chilled 350 38.8
33 Qc2 Quebec chilled 500 38.6
34 Qc2 Quebec chilled 675 37.5
35 Qc2 Quebec chilled 1000 42.4
36 Qc3 Quebec chilled 95 15.1
37 Qc3 Quebec chilled 175 21.0
38 Qc3 Quebec chilled 250 38.1
39 Qc3 Quebec chilled 350 34.0
40 Qc3 Quebec chilled 500 38.9
41 Qc3 Quebec chilled 675 39.6
42 Qc3 Quebec chilled 1000 41.4
43 Mn1 Mississippi nonchilled 95 10.6
44 Mn1 Mississippi nonchilled 175 19.2
45 Mn1 Mississippi nonchilled 250 26.2
46 Mn1 Mississippi nonchilled 350 30.0
47 Mn1 Mississippi nonchilled 500 30.9
48 Mn1 Mississippi nonchilled 675 32.4
49 Mn1 Mississippi nonchilled 1000 35.5
50 Mn2 Mississippi nonchilled 95 12.0
51 Mn2 Mississippi nonchilled 175 22.0
52 Mn2 Mississippi nonchilled 250 30.6
53 Mn2 Mississippi nonchilled 350 31.8
54 Mn2 Mississippi nonchilled 500 32.4
55 Mn2 Mississippi nonchilled 675 31.1
56 Mn2 Mississippi nonchilled 1000 31.5
57 Mn3 Mississippi nonchilled 95 11.3
58 Mn3 Mississippi nonchilled 175 19.4
59 Mn3 Mississippi nonchilled 250 25.8
60 Mn3 Mississippi nonchilled 350 27.9
61 Mn3 Mississippi nonchilled 500 28.5
62 Mn3 Mississippi nonchilled 675 28.1
63 Mn3 Mississippi nonchilled 1000 27.8
64 Mc1 Mississippi chilled 95 10.5
65 Mc1 Mississippi chilled 175 14.9
66 Mc1 Mississippi chilled 250 18.1
67 Mc1 Mississippi chilled 350 18.9
68 Mc1 Mississippi chilled 500 19.5
69 Mc1 Mississippi chilled 675 22.2
70 Mc1 Mississippi chilled 1000 21.9
71 Mc2 Mississippi chilled 95 7.7
72 Mc2 Mississippi chilled 175 11.4
73 Mc2 Mississippi chilled 250 12.3
74 Mc2 Mississippi chilled 350 13.0
75 Mc2 Mississippi chilled 500 12.5
76 Mc2 Mississippi chilled 675 13.7
77 Mc2 Mississippi chilled 1000 14.4
78 Mc3 Mississippi chilled 95 10.6
79 Mc3 Mississippi chilled 175 18.0
80 Mc3 Mississippi chilled 250 17.9
81 Mc3 Mississippi chilled 350 17.9
82 Mc3 Mississippi chilled 500 17.9
83 Mc3 Mississippi chilled 675 18.9
84 Mc3 Mississippi chilled 1000 19.9
# average and median of each state
CO2 |>
group_by(Type) |>
summarise(
average = mean(uptake),
median = median(uptake)
)
# clear objects
rm(list = ls())
# make function which calculate mean and median of given vector
mean_and_median_ratio <- function(vect) {
return(mean(vect) / median(vect))
}
# i.g.
vect1 <- c(10, 20, 30)
vect2 <- mean_and_median_ratio(vect1)
vect2
[1] 1
# make a function which ignore max and min of given vector
mean_without_max_min <- function(vect) {
max <- max(vect)
min <- min(vect)
vect <- vect[vect != max & vect != min]
return(mean(vect))
}
#i.g.
vect3 <- c(0, 10, 20, 30, 40, 50, 10000)
vect4 <- mean_without_max_min(vect3)
vect4
[1] 30
Piping is useful when we want to shorten the code which contains the multiple procedures, but we have to be careful to use it when the number of procedures is too large. Piping in that case makes scripts less readable for other developers, and also makes it difficult to find error in scripts. I think at least 3 times of piping are the maximum. We can use creating new variables and objects to avoid too long piping.
apply-family functions are used when we want to apply functions to rows of columns of a data frame or a list without using iterative process. Iterative process like for sentence makes scripts longer and complex to read. Using apply-family functions makes scripts much easier to read and easier to modify.
# import magic_guys.csv file to d1
d1 <- read.csv("/Users/kei.h/Desktop/KI_2024/ki_data/magic_guys.csv")
# check the contents
d1
# make histogram
d1_hist <- ggplot(
data = d1,
aes(x = length, fill = species)) +
geom_histogram(bins = 10) +
theme_bw()
# plot histogram
plot(d1_hist)
# make boxplot
d1_boxplot <- ggplot(data = d1, aes(x = species, y = length, fill = species)) +
geom_boxplot() +
theme_bw()
# plot boxplot
plot(d1_boxplot)
# save boxplot in png
# ggsave("magic_guys_boxplot.png")
# save as pdf
# ggsave("magic_guys_boxplot.pdf")
# save as svg
# ggsave("magic_guys_boxplot.svg")
# import tab file
d2 <- read.table("/Users/kei.h/Desktop/KI_2024/ki_data/microarray_data.tab",
header = TRUE,
sep = "\t",
na.strings = "")
# check the contents
d2
# data size
d2_size <- dim(d2)
d2_size
[1] 553 1000
Calculate the number of missing values of each gene
# calculate the number of missing values
missing_gene <- colSums(is.na(d2))
missing_gene
g1 g2 g3 g4 g5 g6 g7 g8 g9 g10 g11 g12 g13 g14 g15 g16 g17 g18 g19 g20 g21 g22
130 104 74 93 81 30 31 26 12 56 72 105 55 133 75 67 21 553 13 9 207 190
g23 g24 g25 g26 g27 g28 g29 g30 g31 g32 g33 g34 g35 g36 g37 g38 g39 g40 g41 g42 g43 g44
96 141 122 149 47 59 210 22 14 52 14 24 70 83 64 91 144 124 216 64 13 70
g45 g46 g47 g48 g49 g50 g51 g52 g53 g54 g55 g56 g57 g58 g59 g60 g61 g62 g63 g64 g65 g66
61 108 68 404 106 91 230 231 207 195 553 196 186 553 224 362 198 217 208 210 188 351
g67 g68 g69 g70 g71 g72 g73 g74 g75 g76 g77 g78 g79 g80 g81 g82 g83 g84 g85 g86 g87 g88
187 189 187 194 185 185 354 203 216 216 245 201 553 189 188 224 553 213 188 194 192 213
g89 g90 g91 g92 g93 g94 g95 g96 g97 g98 g99 g100 g101 g102 g103 g104 g105 g106 g107 g108 g109 g110
219 229 365 223 369 353 195 230 250 238 553 204 228 199 192 211 373 251 261 207 362 187
g111 g112 g113 g114 g115 g116 g117 g118 g119 g120 g121 g122 g123 g124 g125 g126 g127 g128 g129 g130 g131 g132
200 209 210 224 203 205 189 212 236 260 33 26 25 22 14 48 36 42 18 194 213 367
g133 g134 g135 g136 g137 g138 g139 g140 g141 g142 g143 g144 g145 g146 g147 g148 g149 g150 g151 g152 g153 g154
198 199 553 185 553 553 191 186 14 177 8 13 17 8 186 202 21 30 198 186 207 185
g155 g156 g157 g158 g159 g160 g161 g162 g163 g164 g165 g166 g167 g168 g169 g170 g171 g172 g173 g174 g175 g176
188 196 215 186 188 187 36 27 28 18 212 36 8 23 33 23 226 376 200 215 193 214
g177 g178 g179 g180 g181 g182 g183 g184 g185 g186 g187 g188 g189 g190 g191 g192 g193 g194 g195 g196 g197 g198
185 224 203 193 10 9 10 18 32 47 31 44 15 20 36 35 36 61 20 188 12 8
g199 g200 g201 g202 g203 g204 g205 g206 g207 g208 g209 g210 g211 g212 g213 g214 g215 g216 g217 g218 g219 g220
12 187 12 7 19 215 15 12 8 11 23 173 9 14 7 29 15 36 15 10 16 10
g221 g222 g223 g224 g225 g226 g227 g228 g229 g230 g231 g232 g233 g234 g235 g236 g237 g238 g239 g240 g241 g242
56 31 83 52 27 27 12 13 38 44 17 24 188 11 36 30 39 55 64 38 81 66
g243 g244 g245 g246 g247 g248 g249 g250 g251 g252 g253 g254 g255 g256 g257 g258 g259 g260 g261 g262 g263 g264
65 61 24 26 47 26 36 23 47 203 2 20 17 8 7 70 34 392 31 49 67 60
g265 g266 g267 g268 g269 g270 g271 g272 g273 g274 g275 g276 g277 g278 g279 g280 g281 g282 g283 g284 g285 g286
38 36 18 61 38 32 12 14 15 61 24 22 8 3 26 89 67 18 27 92 59 65
g287 g288 g289 g290 g291 g292 g293 g294 g295 g296 g297 g298 g299 g300 g301 g302 g303 g304 g305 g306 g307 g308
65 57 53 410 17 97 15 60 68 106 127 64 81 39 388 34 10 25 14 18 40 55
g309 g310 g311 g312 g313 g314 g315 g316 g317 g318 g319 g320 g321 g322 g323 g324 g325 g326 g327 g328 g329 g330
57 47 92 61 82 89 70 83 11 15 30 98 115 74 57 90 54 53 72 51 553 19
g331 g332 g333 g334 g335 g336 g337 g338 g339 g340 g341 g342 g343 g344 g345 g346 g347 g348 g349 g350 g351 g352
91 147 113 116 130 60 84 47 70 34 18 23 25 150 48 54 56 58 46 33 238 553
g353 g354 g355 g356 g357 g358 g359 g360 g361 g362 g363 g364 g365 g366 g367 g368 g369 g370 g371 g372 g373 g374
220 200 388 216 218 211 212 205 192 370 371 208 200 191 240 553 186 248 50 65 45 71
g375 g376 g377 g378 g379 g380 g381 g382 g383 g384 g385 g386 g387 g388 g389 g390 g391 g392 g393 g394 g395 g396
20 29 74 93 239 59 212 205 409 204 207 225 241 358 553 553 402 57 53 55 45 211
g397 g398 g399 g400 g401 g402 g403 g404 g405 g406 g407 g408 g409 g410 g411 g412 g413 g414 g415 g416 g417 g418
22 47 39 136 208 200 191 210 248 367 195 196 196 210 57 29 84 40 117 62 553 210
g419 g420 g421 g422 g423 g424 g425 g426 g427 g428 g429 g430 g431 g432 g433 g434 g435 g436 g437 g438 g439 g440
91 41 75 13 69 33 59 55 24 32 94 79 553 359 243 185 190 215 206 230 205 391
g441 g442 g443 g444 g445 g446 g447 g448 g449 g450 g451 g452 g453 g454 g455 g456 g457 g458 g459 g460 g461 g462
6 8 47 17 132 12 30 17 37 195 17 9 86 30 202 10 20 33 63 61 392 356
g463 g464 g465 g466 g467 g468 g469 g470 g471 g472 g473 g474 g475 g476 g477 g478 g479 g480 g481 g482 g483 g484
195 190 220 229 188 189 205 245 10 28 37 6 21 190 70 9 110 23 87 29 80 30
g485 g486 g487 g488 g489 g490 g491 g492 g493 g494 g495 g496 g497 g498 g499 g500 g501 g502 g503 g504 g505 g506
14 23 14 11 23 33 215 187 201 190 188 221 224 553 190 192 39 70 10 9 36 29
g507 g508 g509 g510 g511 g512 g513 g514 g515 g516 g517 g518 g519 g520 g521 g522 g523 g524 g525 g526 g527 g528
64 9 13 189 17 16 184 72 214 43 39 217 553 190 48 200 4 63 61 27 553 36
g529 g530 g531 g532 g533 g534 g535 g536 g537 g538 g539 g540 g541 g542 g543 g544 g545 g546 g547 g548 g549 g550
22 73 553 387 197 192 209 207 238 553 185 201 56 41 14 189 8 12 224 25 13 46
g551 g552 g553 g554 g555 g556 g557 g558 g559 g560 g561 g562 g563 g564 g565 g566 g567 g568 g569 g570 g571 g572
18 26 77 23 80 65 28 203 96 44 227 11 56 66 40 54 94 55 204 198 203 417
g573 g574 g575 g576 g577 g578 g579 g580 g581 g582 g583 g584 g585 g586 g587 g588 g589 g590 g591 g592 g593 g594
249 192 377 553 553 190 189 197 28 19 188 8 386 197 15 29 93 11 181 57 22 28
g595 g596 g597 g598 g599 g600 g601 g602 g603 g604 g605 g606 g607 g608 g609 g610 g611 g612 g613 g614 g615 g616
105 30 59 14 188 3 23 27 2 46 21 19 63 50 18 85 187 219 212 210 553 190
g617 g618 g619 g620 g621 g622 g623 g624 g625 g626 g627 g628 g629 g630 g631 g632 g633 g634 g635 g636 g637 g638
209 234 356 198 8 20 32 15 16 26 24 10 12 21 67 27 11 12 17 14 25 76
g639 g640 g641 g642 g643 g644 g645 g646 g647 g648 g649 g650 g651 g652 g653 g654 g655 g656 g657 g658 g659 g660
12 43 14 11 49 14 16 10 17 26 10 211 13 16 75 52 11 7 216 9 26 70
g661 g662 g663 g664 g665 g666 g667 g668 g669 g670 g671 g672 g673 g674 g675 g676 g677 g678 g679 g680 g681 g682
13 12 398 8 16 71 11 14 372 8 14 77 22 13 40 12 25 50 36 11 59 14
g683 g684 g685 g686 g687 g688 g689 g690 g691 g692 g693 g694 g695 g696 g697 g698 g699 g700 g701 g702 g703 g704
21 17 11 32 19 36 224 3 199 6 5 183 10 63 7 32 7 56 43 39 32 26
g705 g706 g707 g708 g709 g710 g711 g712 g713 g714 g715 g716 g717 g718 g719 g720 g721 g722 g723 g724 g725 g726
54 20 104 15 56 35 108 34 22 32 69 25 29 69 60 23 26 68 24 101 10 58
g727 g728 g729 g730 g731 g732 g733 g734 g735 g736 g737 g738 g739 g740 g741 g742 g743 g744 g745 g746 g747 g748
14 13 18 18 41 32 10 25 37 19 23 17 12 31 19 11 59 190 33 21 69 66
g749 g750 g751 g752 g753 g754 g755 g756 g757 g758 g759 g760 g761 g762 g763 g764 g765 g766 g767 g768 g769 g770
58 14 553 187 553 204 187 210 193 212 208 231 191 188 235 188 215 355 195 356 199 187
g771 g772 g773 g774 g775 g776 g777 g778 g779 g780 g781 g782 g783 g784 g785 g786 g787 g788 g789 g790 g791 g792
29 38 41 23 12 72 21 40 34 48 191 222 219 227 209 221 223 553 249 195 13 25
g793 g794 g795 g796 g797 g798 g799 g800 g801 g802 g803 g804 g805 g806 g807 g808 g809 g810 g811 g812 g813 g814
15 22 63 90 4 17 51 33 242 553 189 385 189 205 211 209 235 219 29 89 22 146
g815 g816 g817 g818 g819 g820 g821 g822 g823 g824 g825 g826 g827 g828 g829 g830 g831 g832 g833 g834 g835 g836
20 55 16 80 28 74 32 107 18 91 28 22 22 18 38 23 192 229 227 186 186 207
g837 g838 g839 g840 g841 g842 g843 g844 g845 g846 g847 g848 g849 g850 g851 g852 g853 g854 g855 g856 g857 g858
191 553 196 200 17 9 57 33 16 13 21 20 189 201 388 34 9 386 35 10 11 14
g859 g860 g861 g862 g863 g864 g865 g866 g867 g868 g869 g870 g871 g872 g873 g874 g875 g876 g877 g878 g879 g880
17 26 186 223 236 553 228 188 216 191 217 211 36 195 27 77 23 32 23 38 13 38
g881 g882 g883 g884 g885 g886 g887 g888 g889 g890 g891 g892 g893 g894 g895 g896 g897 g898 g899 g900 g901 g902
41 57 15 71 25 37 53 18 20 38 189 553 553 202 194 225 252 393 244 225 11 19
g903 g904 g905 g906 g907 g908 g909 g910 g911 g912 g913 g914 g915 g916 g917 g918 g919 g920 g921 g922 g923 g924
85 35 15 35 35 34 108 191 79 30 20 19 52 15 30 35 553 34 48 71 28 55
g925 g926 g927 g928 g929 g930 g931 g932 g933 g934 g935 g936 g937 g938 g939 g940 g941 g942 g943 g944 g945 g946
44 192 19 10 30 26 185 376 224 232 192 250 217 202 228 199 18 17 22 10 70 32
g947 g948 g949 g950 g951 g952 g953 g954 g955 g956 g957 g958 g959 g960 g961 g962 g963 g964 g965 g966 g967 g968
199 69 36 13 187 57 31 12 21 68 26 26 16 19 14 53 56 20 83 10 11 14
g969 g970 g971 g972 g973 g974 g975 g976 g977 g978 g979 g980 g981 g982 g983 g984 g985 g986 g987 g988 g989 g990
26 191 553 225 228 222 211 227 184 195 205 354 11 50 10 32 188 13 13 14 201 21
g991 g992 g993 g994 g995 g996 g997 g998 g999 g1000
13 13 20 29 12 33 18 19 24 17
Make a data frame of gene names, missing values and missing value ratio
# make a data frame
missing_d <- data.frame(gene_name = names(missing_gene),
missing_values = as.numeric(missing_gene),
missing_value_ratio = as.numeric(missing_gene) / nrow(d2))
# check the contents
missing_d
Visualizing
# make a histogram of "missing value ratio"
missingg_hist <- ggplot(data = missing_d, aes(x = missing_value_ratio)) +
geom_histogram(bins = 10) +
theme_bw()
plot(missingg_hist)
# genes over 10%, 20%, 50% criteria
c1 <- c(0.1, 0.2, 0.5)
high_gene <- lapply(c1,
function(criteria){
genes <- missing_d$gene_name[missing_d$missing_value_ratio > criteria]
return(genes)
})
# check the contents, >10%, >20%, >50%
high_gene
[[1]]
[1] "g1" "g2" "g3" "g4" "g5" "g10" "g11" "g12" "g14" "g15" "g16" "g18" "g21" "g22" "g23" "g24" "g25" "g26"
[19] "g28" "g29" "g35" "g36" "g37" "g38" "g39" "g40" "g41" "g42" "g44" "g45" "g46" "g47" "g48" "g49" "g50" "g51"
[37] "g52" "g53" "g54" "g55" "g56" "g57" "g58" "g59" "g60" "g61" "g62" "g63" "g64" "g65" "g66" "g67" "g68" "g69"
[55] "g70" "g71" "g72" "g73" "g74" "g75" "g76" "g77" "g78" "g79" "g80" "g81" "g82" "g83" "g84" "g85" "g86" "g87"
[73] "g88" "g89" "g90" "g91" "g92" "g93" "g94" "g95" "g96" "g97" "g98" "g99" "g100" "g101" "g102" "g103" "g104" "g105"
[91] "g106" "g107" "g108" "g109" "g110" "g111" "g112" "g113" "g114" "g115" "g116" "g117" "g118" "g119" "g120" "g130" "g131" "g132"
[109] "g133" "g134" "g135" "g136" "g137" "g138" "g139" "g140" "g142" "g147" "g148" "g151" "g152" "g153" "g154" "g155" "g156" "g157"
[127] "g158" "g159" "g160" "g165" "g171" "g172" "g173" "g174" "g175" "g176" "g177" "g178" "g179" "g180" "g194" "g196" "g200" "g204"
[145] "g210" "g221" "g223" "g233" "g239" "g241" "g242" "g243" "g244" "g252" "g258" "g260" "g263" "g264" "g268" "g274" "g280" "g281"
[163] "g284" "g285" "g286" "g287" "g288" "g290" "g292" "g294" "g295" "g296" "g297" "g298" "g299" "g301" "g309" "g311" "g312" "g313"
[181] "g314" "g315" "g316" "g320" "g321" "g322" "g323" "g324" "g327" "g329" "g331" "g332" "g333" "g334" "g335" "g336" "g337" "g339"
[199] "g344" "g347" "g348" "g351" "g352" "g353" "g354" "g355" "g356" "g357" "g358" "g359" "g360" "g361" "g362" "g363" "g364" "g365"
[217] "g366" "g367" "g368" "g369" "g370" "g372" "g374" "g377" "g378" "g379" "g380" "g381" "g382" "g383" "g384" "g385" "g386" "g387"
[235] "g388" "g389" "g390" "g391" "g392" "g396" "g400" "g401" "g402" "g403" "g404" "g405" "g406" "g407" "g408" "g409" "g410" "g411"
[253] "g413" "g415" "g416" "g417" "g418" "g419" "g421" "g423" "g425" "g429" "g430" "g431" "g432" "g433" "g434" "g435" "g436" "g437"
[271] "g438" "g439" "g440" "g445" "g450" "g453" "g455" "g459" "g460" "g461" "g462" "g463" "g464" "g465" "g466" "g467" "g468" "g469"
[289] "g470" "g476" "g477" "g479" "g481" "g483" "g491" "g492" "g493" "g494" "g495" "g496" "g497" "g498" "g499" "g500" "g502" "g507"
[307] "g510" "g513" "g514" "g515" "g518" "g519" "g520" "g522" "g524" "g525" "g527" "g530" "g531" "g532" "g533" "g534" "g535" "g536"
[325] "g537" "g538" "g539" "g540" "g541" "g544" "g547" "g553" "g555" "g556" "g558" "g559" "g561" "g563" "g564" "g567" "g569" "g570"
[343] "g571" "g572" "g573" "g574" "g575" "g576" "g577" "g578" "g579" "g580" "g583" "g585" "g586" "g589" "g591" "g592" "g595" "g597"
[361] "g599" "g607" "g610" "g611" "g612" "g613" "g614" "g615" "g616" "g617" "g618" "g619" "g620" "g631" "g638" "g650" "g653" "g657"
[379] "g660" "g663" "g666" "g669" "g672" "g681" "g689" "g691" "g694" "g696" "g700" "g707" "g709" "g711" "g715" "g718" "g719" "g722"
[397] "g724" "g726" "g743" "g744" "g747" "g748" "g749" "g751" "g752" "g753" "g754" "g755" "g756" "g757" "g758" "g759" "g760" "g761"
[415] "g762" "g763" "g764" "g765" "g766" "g767" "g768" "g769" "g770" "g776" "g781" "g782" "g783" "g784" "g785" "g786" "g787" "g788"
[433] "g789" "g790" "g795" "g796" "g801" "g802" "g803" "g804" "g805" "g806" "g807" "g808" "g809" "g810" "g812" "g814" "g818" "g820"
[451] "g822" "g824" "g831" "g832" "g833" "g834" "g835" "g836" "g837" "g838" "g839" "g840" "g843" "g849" "g850" "g851" "g854" "g861"
[469] "g862" "g863" "g864" "g865" "g866" "g867" "g868" "g869" "g870" "g872" "g874" "g882" "g884" "g891" "g892" "g893" "g894" "g895"
[487] "g896" "g897" "g898" "g899" "g900" "g903" "g909" "g910" "g911" "g919" "g922" "g926" "g931" "g932" "g933" "g934" "g935" "g936"
[505] "g937" "g938" "g939" "g940" "g945" "g947" "g948" "g951" "g952" "g956" "g963" "g965" "g970" "g971" "g972" "g973" "g974" "g975"
[523] "g976" "g977" "g978" "g979" "g980" "g985" "g989"
[[2]]
[1] "g1" "g14" "g18" "g21" "g22" "g24" "g25" "g26" "g29" "g39" "g40" "g41" "g48" "g51" "g52" "g53" "g54" "g55"
[19] "g56" "g57" "g58" "g59" "g60" "g61" "g62" "g63" "g64" "g65" "g66" "g67" "g68" "g69" "g70" "g71" "g72" "g73"
[37] "g74" "g75" "g76" "g77" "g78" "g79" "g80" "g81" "g82" "g83" "g84" "g85" "g86" "g87" "g88" "g89" "g90" "g91"
[55] "g92" "g93" "g94" "g95" "g96" "g97" "g98" "g99" "g100" "g101" "g102" "g103" "g104" "g105" "g106" "g107" "g108" "g109"
[73] "g110" "g111" "g112" "g113" "g114" "g115" "g116" "g117" "g118" "g119" "g120" "g130" "g131" "g132" "g133" "g134" "g135" "g136"
[91] "g137" "g138" "g139" "g140" "g142" "g147" "g148" "g151" "g152" "g153" "g154" "g155" "g156" "g157" "g158" "g159" "g160" "g165"
[109] "g171" "g172" "g173" "g174" "g175" "g176" "g177" "g178" "g179" "g180" "g196" "g200" "g204" "g210" "g233" "g252" "g260" "g290"
[127] "g297" "g301" "g321" "g329" "g332" "g333" "g334" "g335" "g344" "g351" "g352" "g353" "g354" "g355" "g356" "g357" "g358" "g359"
[145] "g360" "g361" "g362" "g363" "g364" "g365" "g366" "g367" "g368" "g369" "g370" "g379" "g381" "g382" "g383" "g384" "g385" "g386"
[163] "g387" "g388" "g389" "g390" "g391" "g396" "g400" "g401" "g402" "g403" "g404" "g405" "g406" "g407" "g408" "g409" "g410" "g415"
[181] "g417" "g418" "g431" "g432" "g433" "g434" "g435" "g436" "g437" "g438" "g439" "g440" "g445" "g450" "g455" "g461" "g462" "g463"
[199] "g464" "g465" "g466" "g467" "g468" "g469" "g470" "g476" "g491" "g492" "g493" "g494" "g495" "g496" "g497" "g498" "g499" "g500"
[217] "g510" "g513" "g515" "g518" "g519" "g520" "g522" "g527" "g531" "g532" "g533" "g534" "g535" "g536" "g537" "g538" "g539" "g540"
[235] "g544" "g547" "g558" "g561" "g569" "g570" "g571" "g572" "g573" "g574" "g575" "g576" "g577" "g578" "g579" "g580" "g583" "g585"
[253] "g586" "g591" "g599" "g611" "g612" "g613" "g614" "g615" "g616" "g617" "g618" "g619" "g620" "g650" "g657" "g663" "g669" "g689"
[271] "g691" "g694" "g744" "g751" "g752" "g753" "g754" "g755" "g756" "g757" "g758" "g759" "g760" "g761" "g762" "g763" "g764" "g765"
[289] "g766" "g767" "g768" "g769" "g770" "g781" "g782" "g783" "g784" "g785" "g786" "g787" "g788" "g789" "g790" "g801" "g802" "g803"
[307] "g804" "g805" "g806" "g807" "g808" "g809" "g810" "g814" "g831" "g832" "g833" "g834" "g835" "g836" "g837" "g838" "g839" "g840"
[325] "g849" "g850" "g851" "g854" "g861" "g862" "g863" "g864" "g865" "g866" "g867" "g868" "g869" "g870" "g872" "g891" "g892" "g893"
[343] "g894" "g895" "g896" "g897" "g898" "g899" "g900" "g910" "g919" "g926" "g931" "g932" "g933" "g934" "g935" "g936" "g937" "g938"
[361] "g939" "g940" "g947" "g951" "g970" "g971" "g972" "g973" "g974" "g975" "g976" "g977" "g978" "g979" "g980" "g985" "g989"
[[3]]
[1] "g18" "g48" "g55" "g58" "g60" "g66" "g73" "g79" "g83" "g91" "g93" "g94" "g99" "g105" "g109" "g132" "g135" "g137"
[19] "g138" "g172" "g260" "g290" "g301" "g329" "g352" "g355" "g362" "g363" "g368" "g383" "g388" "g389" "g390" "g391" "g406" "g417"
[37] "g431" "g432" "g440" "g461" "g462" "g498" "g519" "g527" "g531" "g532" "g538" "g572" "g575" "g576" "g577" "g585" "g615" "g619"
[55] "g663" "g669" "g751" "g753" "g766" "g768" "g788" "g802" "g804" "g838" "g851" "g854" "g864" "g892" "g893" "g898" "g919" "g932"
[73] "g971" "g980"
# calculate mean of each gene expression
mean_expression_of_each_gene <- sapply(d2,
function(expression_values){
mean <- mean(expression_values, na.rm=TRUE)
return(mean)
})
# make data frame
mean_expression_of_each_gene_d <- data.frame(gene_name = names(mean_expression_of_each_gene),
mean_expression = as.numeric(mean_expression_of_each_gene))
# check the contents
mean_expression_of_each_gene_d
# replace missing values with mean expression value of each gene
# make new data frame d3
d3 <- d2 %>%
mutate(across(everything(), ~ifelse(
is.na(.),
mean_expression_of_each_gene_d$mean_expression[
mean_expression_of_each_gene_d$gene_name==cur_column()],
.
)))
# check the contents
d3